load data: 246 subjects, with 845 goals are included in the following analysis
goalRating_long_R <- read.csv(here("Baseline", "Inputs", "goalRating_long_R.csv"),stringsAsFactors = F)
wideDf <- read.csv(here("Baseline", "Inputs", "wideDf.csv"),stringsAsFactors = F)
indivDiffDf <- read.csv(here("Baseline", "Inputs", "indivDiffDf.csv"),stringsAsFactors = F)
goalListDf <- read.csv(here("Baseline", "Outputs", "listedGoals.csv"), stringsAsFactors = F)
goalDf_wide <- read.csv(here("Baseline", "Outputs", "goalDf_wide.csv"))
Check the number of missing data per variable, and below is the top 5 variables. Missing data is rare for all variables except for external_importance. (5% of the goals had missing data) It was because there’s a glitch of the display logical for one of the goal blocks.
# check the number of missing data
totalGoal <- nrow(goalRating_long_R)/41
goalRating_long_R %>%
filter(is.na(rating)) %>%
tabyl(variable) %>%
mutate(percent = n/totalGoal) %>%
arrange(desc(percent)) %>%
head(5)
## variable n percent
## external_importance 42 0.049704142
## end_state_specificity_R 6 0.007100592
## attractiveness_achievement 3 0.003550296
## commitment 2 0.002366864
## commonality 2 0.002366864
“construal_level”,“approach_avoidance” and “attainment_maintenance” question have an option for “I’m not sure” because they ask subjects to categorize their goals.
around 1% of the goals had “I’m not sure” as the response.
# check the number of "I'm not sure" responses per varialbe
goalRating_long_R %>%
filter(rating == 99) %>%
tabyl(variable) %>%
mutate(percent = n/totalGoal) %>%
arrange(desc(percent))
## variable n percent
## attainment_maintenance_R 13 0.01538462
## approach_avoidance_R 11 0.01301775
## construal_level 11 0.01301775
temporal_duration, frequency and end_state_specificity question have an option for “not specified” because they ask about features that may not be applicable to all goals.
around 5% of the responses are “not specified”
# check the number of "not specified" responses per varialbe
goalRating_long_R %>%
filter(rating == 999) %>%
tabyl(variable) %>%
mutate(percent = n/totalGoal) %>%
arrange(desc(percent))
## variable n percent
## temporal_duration 41 0.04852071
## end_state_specificity_R 40 0.04733728
## frequency_R 25 0.02958580
All “I’m not sure” and “not specified” responses will be treated as missing data.
# transform 99 & 999 to NAs
goalRating_long_R <- goalRating_long_R %>%
mutate(rating = replace(rating, rating == 99 | rating == 999, NA))
Descriptive on the number of goals subject claimed to have prior to listing them (lower than the previous mTurk study: mean = 4.4)
describe(wideDf$total_goal)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 246 3.68 2.9 3 3.31 1.48 1 40 39 8.47 98.79 0.18
Visualize the number of claimed goals per subject after excluding the extreme value (> 20) (1 claimed 40)
breaks = (1:20)
wideDf %>%
filter(total_goal < 20) %>%
ggplot(aes(x = total_goal)) +
scale_x_continuous(labels=scales::comma(breaks, accuracy = 1), breaks=breaks) +
geom_histogram(fill = "orange",
colour = "black",
binwidth = 1) +
labs(x="Number of claimed goals", y="# of participants") +
theme_classic(base_size = 18)
The percentage of subjects who claimed having more than 5 goals: 9.8%
# get the number of total subject
totalSub <- nrow(indivDiffDf)
length(wideDf$total_goal[wideDf$total_goal>5])/totalSub
## [1] 0.09756098
Descriptive on the number of goals participants actual listed (similar to all previous study), but the distribution was different (on previous mTurk data, 5-goal had the highest frequency).
describe(wideDf$listNum)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 246 3.43 1.14 3 3.46 1.48 1 5 4 0.01 -0.92 0.07
breaks <- (1:5)
wideDf %>%
ggplot(aes(x = listNum)) +
scale_x_continuous(labels=scales::comma(breaks, accuracy = 1), breaks=seq(1, 5, by = 1)) +
geom_histogram(fill = "orange",
colour = "black",
binwidth = 1) +
labs(x="Number of listed goals", y="# of participants") +
theme_classic(base_size = 18)
number of people who listed 1 goal: 9 (less than previous)
length(wideDf$listNum[wideDf$listNum == 1])
## [1] 9
descriptive on the differences between the number of claimed goals and listed goals (after exclude the extreme case)
wideDf <-wideDf %>%
mutate(diffNum = total_goal - listNum)
goalDf_sum_wide_clean <- wideDf %>%filter(total_goal < 20)
describe(goalDf_sum_wide_clean$diffNum)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 245 0.11 1.24 0 0.01 0 -4 10 14 3.61 24.47 0.08
breaks <- (-4:15)
goalDf_sum_wide_clean %>%
ggplot(aes(x = diffNum)) +
scale_x_continuous(labels=scales::comma(breaks, accuracy = 1), breaks=breaks) +
geom_histogram(fill = "orange",
colour = "black",
binwidth = 1) +
labs(x="Number of claimed goals - listed goals", y="# of participants") +
theme_classic(base_size = 18)
percentage of people who listed more goals than they claimed: 13% (less than previous mTurk)
length(wideDf$diffNum[wideDf$diffNum <0])/totalSub *100
## [1] 13.82114
percentage of people who listed less goals more than they claimed: 14%
length(wideDf$diffNum[wideDf$diffNum >0])/totalSub *100
## [1] 14.63415
# descriptive stats for each variable
mTurk_b_descriptive <- goalRating_long_R %>%
dplyr::select(variable, rating) %>%
group_by(variable) %>%
summarize(mean = mean(rating, na.rm = TRUE),
sd = sd(rating, na.rm = TRUE),
n = n(),
min = min(rating, na.rm = TRUE),
max = max(rating, na.rm = TRUE),
skew = skew(rating, na.rm = T),
kurtosi = kurtosi(rating, na.rm = T)
) %>%
arrange(skew)
#write.csv(goalRating_long_R, here("Baseline", "Outputs", "goalRating_long_R.csv"), row.names = F)
mTurk_b_descriptive %>%
kable(format = "html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
| variable | mean | sd | n | min | max | skew | kurtosi |
|---|---|---|---|---|---|---|---|
| approach_avoidance_R | 6.224221 | 1.5452637 | 845 | 1 | 7 | -2.1727849 | 3.8432880 |
| control | 6.253855 | 1.1856600 | 845 | 1 | 7 | -1.9338349 | 3.8044055 |
| specificity | 6.055687 | 1.3898389 | 845 | 1 | 7 | -1.6669884 | 2.2108652 |
| identified_motivation | 6.215385 | 1.1115507 | 845 | 1 | 7 | -1.5736792 | 2.4322229 |
| measurability | 6.098341 | 1.3091691 | 845 | 1 | 7 | -1.4236411 | 1.3002904 |
| clarity | 6.082840 | 1.0798681 | 845 | 1 | 7 | -1.2753662 | 1.5490704 |
| attractiveness_achievement | 6.042755 | 1.0231917 | 845 | 1 | 7 | -1.2428597 | 2.0018518 |
| self_resources | 6.071174 | 1.0631651 | 845 | 1 | 7 | -1.2018394 | 1.5139371 |
| importance | 6.127811 | 1.1623010 | 845 | 2 | 7 | -1.1990699 | 0.5219177 |
| commitment | 6.111506 | 1.1354357 | 845 | 2 | 7 | -1.1282786 | 0.3001185 |
| ideal_motivation | 5.571598 | 1.5971384 | 845 | 1 | 7 | -1.1191065 | 0.5972925 |
| social_desirability | 5.865955 | 1.2755013 | 845 | 1 | 7 | -1.0896227 | 0.8126483 |
| initial_time_R | 6.137278 | 1.7524782 | 845 | 1 | 8 | -1.0527170 | 0.8437848 |
| basic_needs | 5.381065 | 1.8502264 | 845 | 1 | 7 | -1.0309408 | -0.0461374 |
| attractiveness_progress | 5.940828 | 1.0274903 | 845 | 1 | 7 | -0.9945787 | 1.0384554 |
| implementation_intention | 5.621590 | 1.3803366 | 845 | 1 | 7 | -0.9066333 | 0.2110371 |
| regret | 5.572275 | 1.4856335 | 845 | 1 | 7 | -0.8200699 | -0.1315040 |
| temporal_duration | 3.116915 | 0.6924337 | 845 | 1 | 4 | -0.7205211 | 1.0806291 |
| commonality | 5.026097 | 1.8880251 | 845 | 1 | 7 | -0.7175472 | -0.5650281 |
| end_state_specificity_R | 2.315394 | 0.8514477 | 845 | 1 | 3 | -0.6497471 | -1.3114261 |
| instrumentality | 5.036730 | 1.8725277 | 845 | 1 | 7 | -0.6476131 | -0.6762803 |
| attainability | 8.223669 | 2.0712220 | 845 | 2 | 12 | -0.5713721 | -0.1369239 |
| other_resources | 4.795266 | 1.7671238 | 845 | 1 | 7 | -0.5505078 | -0.5271092 |
| meaningfulness | 4.940758 | 1.7551949 | 845 | 1 | 7 | -0.4488154 | -0.7473833 |
| affordance | 5.268639 | 1.4399591 | 845 | 1 | 7 | -0.4303517 | -0.6494085 |
| urgency | 4.931361 | 1.6145146 | 845 | 1 | 7 | -0.4220002 | -0.5668751 |
| external_importance | 4.570361 | 2.0791011 | 845 | 1 | 7 | -0.3385082 | -1.1988098 |
| attainment_maintenance_R | 4.484375 | 2.4479656 | 845 | 1 | 7 | -0.3282102 | -1.5504706 |
| visibility | 4.432977 | 2.0897224 | 845 | 1 | 7 | -0.2851378 | -1.1801299 |
| difficulty | 5.042604 | 1.4918683 | 845 | 1 | 7 | -0.2737788 | -0.6725020 |
| effort | 4.882701 | 1.6071413 | 845 | 1 | 7 | -0.2612870 | -0.8099205 |
| failure | 1.979882 | 0.9227624 | 845 | 1 | 3 | 0.0397350 | -1.8258040 |
| construal_level | 3.990396 | 1.9999769 | 845 | 1 | 7 | 0.0555066 | -1.1889644 |
| connectedness | 3.856465 | 1.9621212 | 845 | 1 | 7 | 0.0639689 | -1.1150778 |
| introjected_motivation | 3.623669 | 2.1497070 | 845 | 1 | 7 | 0.1725515 | -1.3866973 |
| procrastination | 3.571090 | 1.7645294 | 845 | 1 | 7 | 0.2363466 | -0.8945785 |
| intrinsic_motivation | 3.511848 | 2.1328592 | 845 | 1 | 7 | 0.2576867 | -1.3216728 |
| external_motivation | 3.305687 | 2.1323728 | 845 | 1 | 7 | 0.3857426 | -1.2700443 |
| frequency_R | 1.364634 | 0.4816212 | 845 | 1 | 2 | 0.5614403 | -1.6868374 |
| ought_motivation | 2.725444 | 1.9593931 | 845 | 1 | 7 | 0.8769575 | -0.5325835 |
| conflict | 2.189798 | 1.4692543 | 845 | 1 | 7 | 1.2706138 | 0.9993013 |
# order based on their skewness
#kable(varDf[order(varDf$skew),])
#mTurk_b_descriptive <- write.csv(mTurk_b_descriptive, here("Baseline", "Outputs", "mTurk_b_descriptive.csv"), row.names = F)
The trend looks more skewed than previous MTurk data
# histograms for each dimension
goalRating_long_R %>%
ggplot(aes(x = rating)) +
geom_histogram(fill = "orange",
colour = "black",
alpha = .6) +
facet_wrap(~variable, nrow = 7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
“pairwise.complete.obs” is used for generating correlation matrix.The correlations make sense
# transform the long format to short format
goalDf_wide <- goalRating_long_R %>% spread (variable, rating)
# generate a correctional matrix
corrM_all <- goalDf_wide %>%
dplyr :: select(affordance:visibility) %>%
cor(use = "pairwise.complete.obs")
# visualization
corrplot(corrM_all, method = "circle",number.cex = .7, order = "AOE", addCoef.col = "black",type = "upper",col= colorRampPalette(c("midnightblue","white", "orange"))(200))
# write the wide format
write.csv(goalDf_wide, here("Baseline", "Outputs", "goalDf_wide.csv"), row.names = F)
left_join(goalListDf, goalDf_wide, by = c("MTurkCode","goal_order" = "goal")) %>%
write.csv(., here("Baseline", "Outputs", "goalList_rating.csv"),row.names = F)
#write.csv(corrM_all, here("Baseline", "Outputs", "mTurk_b_corrM_all.csv"), row.names = F)
Only the 31 variables for goal representation are included. Only around 6.7% of the variance is on the between subject level. (less than previous mTurk data)
# subset the long format dataset for only the 31 goal representation variable
goal_striving <- c("commitment", "urgency", "effort", "initial_time_R", "regret", "procrastination", "failure", "self_resources", "other_resources", "implementation_intention")
goalDf_R_long <- goalRating_long_R[!goalRating_long_R$variable %in% goal_striving,]
# generate a multilevel model with subject as the random intercept
mlm <-lmer(rating ~ variable + (1|MTurkCode), data = goalDf_R_long)
# calculate the variance partition coefficient and transform to ICC
VarCorr(mlm) %>%
as_tibble() %>%
mutate(icc=vcov/sum(vcov)) %>%
dplyr :: select(grp, icc)
## # A tibble: 2 x 2
## grp icc
## <chr> <dbl>
## 1 MTurkCode 0.0671
## 2 Residual 0.933
Raw <- VarCorr(mlm) %>%
as_tibble() %>%
mutate(Raw=vcov/sum(vcov)) %>%
dplyr :: select(Raw)
# just for paper output
mTurk_b_icc <- VarCorr(mlm) %>%
as_tibble() %>%
mutate(mTurk_b_icc=vcov/sum(vcov)) %>%
dplyr :: select(mTurk_b_icc)
mTurk_b_icc$variation <- c("between-subject", "residual")
# write.csv(mTurk_b_icc, here("Baseline", "Outputs", "mTurk_b_icc.csv"), row.names = F)
item wise ICC
# create the function
varPartition <- function(var, groupVar, data){
mlm <- lmer(var ~ 1 + (1|groupVar), data = data)
outputDf <- VarCorr(mlm) %>%
as_tibble() %>%
mutate(outputDf=vcov/sum(vcov)) %>%
dplyr :: select(outputDf)
}
# apply the function to each variable after excluding subjects with just 1 goal
df_cleaned <- goalDf_wide %>%
filter(total_goal != 1) %>%
select(-goal, -listNum, -total_goal, -goalType)
mTurk_b_item_ICC <- df_cleaned %>%
select(-MTurkCode) %>%
map(~varPartition(., df_cleaned$MTurkCode, df_cleaned)) %>%
as_tibble() %>%
t() %>%
as.data.frame() %>%
select(V1) %>%
rename("mTurk_b_icc" = "V1") %>%
rownames_to_column("Items")
#write.csv(mTurk_b_item_ICC, here("Baseline", "Outputs", "mTurk_b_item_ICC.csv"), row.names = F)
26 variables are included. Ordinal variables are not included: “temporal_duration” & “end_state_specificity” and “frequency”; appoach_avoidance_R & attainment_maintainance_R are also dropped because these 2 variables are more relevant to the phrasing/content of a goal than the perception of a goal. This step is consistent with the previous studies
# Exclude the 8 variables related to goal striving progress
goalDf_R_wide <- goalDf_wide[,!names(goalDf_wide) %in% goal_striving]
# Exclude 5 goal representation variables and other columns with irrelevant data
goal_exclude <- c("temporal_duration", "end_state_specificity_R", "frequency_R", "attainment_maintenance_R", "approach_avoidance_R")
goalDf_EFA <- goalDf_R_wide[,!names(goalDf_R_wide) %in% goal_exclude]
goalDf_EFA <- subset(goalDf_EFA, select = affordance : visibility)
# Generate a correctional matrix
corrM_raw <- cor(goalDf_EFA, use = "pairwise")
# export the correlation matrix
write.csv(goalDf_R_wide,here("Baseline", "Outputs", "baseline_goalRap.csv"), row.names = F)
write.csv(corrM_raw,here("Baseline", "Outputs", "baseline_corrRating_raw.csv"), , row.names = F)
write.csv(goalDf_EFA,here("Baseline", "Outputs", "baseline_EFA_raw.csv"), row.names = F)
# use Very Simple Structure criterion
res_vss <- psych :: nfactors(corrM_raw, n = 10, rotate = "promax", diagonal = FALSE, fm = "minres",
n.obs=845,title="Very Simple Structure",use="pairwise",cor="cor")
# select useful parameters and organize them into a table
cbind(1:10, res_vss$map) %>%
as_tibble() %>%
rename(., factor = V1, map = V2) %>%
cbind(., res_vss$vss.stats) %>%
select(factor, map, fit, complex, eChisq, SRMR, eCRMS, eBIC, eRMS) %>%
kable(format = "html", escape = F, caption = "VSS output -mTurk") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
| factor | map | fit | complex | eChisq | SRMR | eCRMS | eBIC | eRMS |
|---|---|---|---|---|---|---|---|---|
| 1 | 0.0167181 | 0.5377182 | 1.000000 | 5484.6510 | 0.0999285 | 0.1041827 | 3469.5893 | 0.0999285 |
| 2 | 0.0151357 | 0.6396585 | 1.271966 | 3045.2357 | 0.0744604 | 0.0810946 | 1198.6575 | 0.0744604 |
| 3 | 0.0144926 | 0.6217498 | 1.362641 | 2077.4962 | 0.0615014 | 0.0701224 | 392.6621 | 0.0615014 |
| 4 | 0.0154716 | 0.6290689 | 1.480419 | 1408.0995 | 0.0506328 | 0.0605843 | -121.7299 | 0.0506328 |
| 5 | 0.0159802 | 0.6296766 | 1.701375 | 870.4132 | 0.0398087 | 0.0501236 | -511.1508 | 0.0398087 |
| 6 | 0.0171543 | 0.6412920 | 1.741486 | 505.9865 | 0.0303518 | 0.0403383 | -734.0514 | 0.0303518 |
| 7 | 0.0195055 | 0.6311499 | 1.916554 | 358.2234 | 0.0255383 | 0.0359511 | -747.0278 | 0.0255383 |
| 8 | 0.0226734 | 0.6154710 | 1.932461 | 272.1391 | 0.0222592 | 0.0333248 | -705.0647 | 0.0222592 |
| 9 | 0.0260845 | 0.6019241 | 1.935562 | 180.7963 | 0.0181430 | 0.0290235 | -675.0994 | 0.0181430 |
| 10 | 0.0296189 | 0.5697054 | 1.840247 | 118.1485 | 0.0146666 | 0.0252101 | -623.1785 | 0.0146666 |
# Use the Scree plot to identify the number of factors have Eigenvalues >1 and the output from the Parallel analysis
ev <- eigen(corrM_raw)
ap <- parallel(subject=nrow(goalDf_EFA),var=ncol(goalDf_EFA),
rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)
# extract 4 factors
fa_new_4 <-fa(r=corrM_raw, nfactors=4,n.obs = 845, rotate="promax", SMC=FALSE, fm="minres")
fa.diagram(fa_new_4)
The factors remain the same but some variables have shifted, such as intrinsic motivation
# visualization
loadings <- fa.sort(fa_new_4)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("Value","Clarity", "External", "Consensus")
loadings$Items <- rownames(loadings)
loadings.m <- loadings %>% gather(-Items, key = "Factor", value = "Loading")
colOrder <- c("Value","Clarity", "External", "Consensus")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Items=factor(Items,leve=rowOrder)),Items)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)
ggplot(loadings.m, aes(Items, abs(Loading), fill=Loading)) +
facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
geom_bar(stat="identity") + #make the bars
coord_flip() + #flip the axes so the test names can be horizontal
#define the fill color gradient: blue=positive, red=negative
scale_fill_gradient2(name = "Loading",
high = "orange", mid = "white", low = "midnightblue",
midpoint=0, guide="colourbar") +
ylab("Loading Strength") + #improve y-axis label +
ggtitle("The Four Factor Solution from Study 5 Baseline") +
geom_hline(yintercept = 0.3, color = "red", linetype="dotted") +
theme_bw(base_size=10)
interfactor correlation:
Less inter-correlated than the previous dataset
fa_new_4$Phi %>%
as.tibble() %>%
dplyr::rename(Value = MR1, Clarity = MR2, External = MR3, Cansensus = MR4) %>%
round(.,2) %>%
remove_rownames() %>%
mutate(factor = colnames(.)) %>%
select(factor, everything()) %>%
kable(format = "html", escape = F, caption = "<center>Factor Correlation of mTurk Longitudinal</center>") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
| factor | Value | Clarity | External | Cansensus |
|---|---|---|---|---|
| Value | 1.00 | 0.37 | 0.33 | 0.40 |
| Clarity | 0.37 | 1.00 | -0.07 | 0.18 |
| External | 0.33 | -0.07 | 1.00 | 0.14 |
| Cansensus | 0.40 | 0.18 | 0.14 | 1.00 |
Less fit and has lower cummulated variance explained then the 4-factor model in mTurk 2
data.frame(sample = "mTurk_l", factors = 4, items = 26, observation = 845, chi = fa_new_4$chi, BIC = fa_new_4$BIC, fit = fa_new_4$fit, RMSEA = fa_new_4$RMSEA[1], cumVar = max(fa_new_4$Vaccounted[3,]), complexity = mean(fa_new_4$complexity)) %>%
remove_rownames() %>%
kable(format = "html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
| sample | factors | items | observation | chi | BIC | fit | RMSEA | cumVar | complexity |
|---|---|---|---|---|---|---|---|---|---|
| mTurk_l | 4 | 26 | 845 | 1408.1 | -219.5637 | 0.7446622 | 0.0751402 | 0.3382828 | 1.480419 |
sona_factorFit <- data.frame(sample = "mTurk", factors = 4, items = 26, observation = 845, chi = fa_new_4$chi, BIC = fa_new_4$BIC, fit = fa_new_4$fit, RMSEA = fa_new_4$RMSEA[1], cumVar = max(fa_new_4$Vaccounted[3,]), complexity = mean(fa_new_4$complexity))
#write.csv(sona_factorFit, "./outputs/mTurk2_factorFit.csv", row.names = F)
# extract 6 factors
fa_new_6 <-fa(r=corrM_raw, nfactors=6,n.obs = 845, rotate="promax", SMC=FALSE, fm="minres")
fa.diagram(fa_new_6)
factor value, external, attainability, consensus and measurability are similar across datasets. The factor that slip from value factor is different across studies
# visualization
loadings <- fa.sort(fa_new_6)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("Value","External", "Attainability", "Instrumentality", "Consensus", "Measurability")
loadings$Items <- rownames(loadings)
loadings.m <- loadings %>% gather(-Items, key = "Factor", value = "Loading")
colOrder <- c("Value","External", "Attainability", "Instrumentality", "Consensus", "Measurability")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Items=factor(Items,leve=rowOrder)),Items)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)
ggplot(loadings.m, aes(Items, abs(Loading), fill=Loading)) +
facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
geom_bar(stat="identity") + #make the bars
coord_flip() + #flip the axes so the test names can be horizontal
#define the fill color gradient: blue=positive, red=negative
scale_fill_gradient2(name = "Loading",
high = "orange", mid = "white", low = "midnightblue",
midpoint=0, guide="colourbar") +
ylab("Loading Strength") + #improve y-axis label +
ggtitle("The Six Factor Solution from Study 5 Baseline") +
geom_hline(yintercept = 0.3, color = "red", linetype="dotted") +
theme_bw(base_size=10)
interfactor correlation:
factor value and instrumentality has the highest correlation
fa_new_6$Phi %>%
as.tibble() %>%
dplyr::rename(Value = MR1, External = MR2, Attainability = MR3, Instrumentality = MR4, Consensus = MR5,Measurability = MR6) %>%
round(.,2) %>%
remove_rownames() %>%
mutate(factor = colnames(.)) %>%
select(factor, everything()) %>%
kable(format = "html", escape = F, caption = "<center>Factor Correlation of mTurk Longitudinal</center>") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| factor | Value | External | Attainability | Measurability | Instrumentality | Consensus |
|---|---|---|---|---|---|---|
| Value | 1.00 | 0.35 | 0.37 | 0.49 | 0.34 | 0.04 |
| External | 0.35 | 1.00 | -0.05 | 0.12 | 0.19 | -0.19 |
| Attainability | 0.37 | -0.05 | 1.00 | 0.26 | 0.14 | 0.12 |
| Measurability | 0.49 | 0.12 | 0.26 | 1.00 | 0.24 | 0.05 |
| Instrumentality | 0.34 | 0.19 | 0.14 | 0.24 | 1.00 | 0.27 |
| Consensus | 0.04 | -0.19 | 0.12 | 0.05 | 0.27 | 1.00 |
this 6-factor model fit a bit less than the SONA 2
data.frame(sample = "mTurk_l", factors = 6, items = 26, observation = 845, chi = fa_new_6$chi, BIC = fa_new_6$BIC, fit = fa_new_6$fit, RMSEA = fa_new_6$RMSEA[1], cumVar = max(fa_new_6$Vaccounted[3,]), complexity = mean(fa_new_6$complexity)) %>%
remove_rownames() %>%
kable(format = "html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
| sample | factors | items | observation | chi | BIC | fit | RMSEA | cumVar | complexity |
|---|---|---|---|---|---|---|---|---|---|
| mTurk_l | 6 | 26 | 845 | 505.9865 | -563.2142 | 0.8100089 | 0.0562876 | 0.4080015 | 1.741486 |
sona_factorFit <- data.frame(sample = "mTurk_l", factors = 6, items = 26, observation = 845, chi = fa_new_6$chi, BIC = fa_new_6$BIC, fit = fa_new_6$fit, RMSEA = fa_new_6$RMSEA[1], cumVar = max(fa_new_6$Vaccounted[3,]), complexity = mean(fa_new_6$complexity))
#write.csv(sona_factorFit, "./outputs/mTurk2_factorFit.csv", row.names = F)
I double checked the labels but it’s still very weird
# create a hierarchical factor structure organization
ba <- bassAckward(corrM_raw, c(4,6), rotate = "promax", scores = "Bartlett", adjust = T, cut = 0.3, use = "pairwise", plot = F)
# re-label the factors with the factor names
ba$labels[[1]] <- c("Value", "Clarity", "External", "Consensus")
ba$labels[[2]] <- c("Value","External", "Attainability", "Instrumentality", "Consensus", "Measurability")
# create the figure
bassAckward.diagram(ba,digits=2,cut = .3,labels=NULL,marg=c(1.5,.5,1.0,.5),
main="BassAckward",items=TRUE,sort=TRUE,lr=F,curves=F,organize=T)
because the distribution of several variables were very skewed, it might be better to use polychoric correlation rather than Pearson’s correlation
Generate the correlation matrix with polychoric correlation
# Generate a correctional matrix with polychoric correaltion
corrM_pc <- mixedCor(goalDf_EFA)$rho
# export the correlation matrix
write.csv(corrM_pc,here("Baseline", "Outputs", "baseline_corrRating_pc.csv"), row.names = F)
# use Very Simple Structure criterion
res_vss <- psych :: nfactors(corrM_pc, n = 10, rotate = "promax", diagonal = FALSE, fm = "minres",
n.obs=845,title="Very Simple Structure",use="pairwise",cor="cor")
# select useful parameters and organize them into a table
cbind(1:10, res_vss$map) %>%
as_tibble() %>%
rename(., factor = V1, map = V2) %>%
cbind(., res_vss$vss.stats) %>%
select(factor, map, fit, complex, eChisq, SRMR, eCRMS, eBIC, eRMS) %>%
kable(format = "html", escape = F, caption = "VSS output -mTurk") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| factor | map | fit | complex | eChisq | SRMR | eCRMS | eBIC | eRMS |
|---|---|---|---|---|---|---|---|---|
| 1 | 0.0239499 | 0.6161719 | 1.000000 | 7546.9805 | 0.1172200 | 0.1222103 | 5531.9189 | 0.1172200 |
| 2 | 0.0208194 | 0.7202552 | 1.281638 | 3834.3235 | 0.0835525 | 0.0909967 | 1987.7453 | 0.0835525 |
| 3 | 0.0191384 | 0.6835308 | 1.358198 | 2586.4517 | 0.0686226 | 0.0782418 | 901.6176 | 0.0686226 |
| 4 | 0.0197582 | 0.6974179 | 1.563847 | 1698.6159 | 0.0556112 | 0.0665413 | 168.7865 | 0.0556112 |
| 5 | 0.0196725 | 0.6773479 | 1.772831 | 1049.8206 | 0.0437192 | 0.0550475 | -331.7434 | 0.0437192 |
| 6 | 0.0205316 | 0.6586235 | 1.788499 | 625.2586 | 0.0337400 | 0.0448412 | -614.7794 | 0.0337400 |
| 7 | 0.0227019 | 0.6344446 | 1.948457 | 443.9486 | 0.0284303 | 0.0400222 | -661.3027 | 0.0284303 |
| 8 | 0.0254301 | 0.6271577 | 1.938379 | 322.3286 | 0.0242250 | 0.0362679 | -654.8752 | 0.0242250 |
| 9 | 0.0280223 | 0.6369919 | 1.950227 | 211.0743 | 0.0196035 | 0.0313597 | -644.8215 | 0.0196035 |
| 10 | 0.0319250 | 0.5660937 | 1.947342 | 132.6798 | 0.0155424 | 0.0267155 | -608.6472 | 0.0155424 |
# Use the Scree plot to identify the number of factors have Eigenvalues >1 and the output from the Parallel analysis
ev <- eigen(corrM_pc)
ap <- parallel(subject=nrow(goalDf_EFA),var=ncol(goalDf_EFA),
rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)
# extract 4 factors
fa_pc_4 <-fa(r=corrM_pc, nfactors=4,n.obs = 845, rotate="promax", SMC=FALSE, fm="minres")
fa.diagram(fa_pc_4)
The factors remain the same but some variables have shifted, such as intrinsic motivation
# visualization
loadings <- fa.sort(fa_pc_4)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("Value","Clarity", "External", "Consensus")
loadings$Items <- rownames(loadings)
loadings.m <- loadings %>% gather(-Items, key = "Factor", value = "Loading")
colOrder <- c("Value","Clarity", "External", "Consensus")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Items=factor(Items,leve=rowOrder)),Items)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)
ggplot(loadings.m, aes(Items, abs(Loading), fill=Loading)) +
facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
geom_bar(stat="identity") + #make the bars
coord_flip() + #flip the axes so the test names can be horizontal
#define the fill color gradient: blue=positive, red=negative
scale_fill_gradient2(name = "Loading",
high = "orange", mid = "white", low = "midnightblue",
midpoint=0, guide="colourbar") +
ylab("Loading Strength") + #improve y-axis label +
ggtitle("Four Factor Solution from mTurk Longitudinal") +
geom_hline(yintercept = 0.3, color = "red", linetype="dotted") +
theme_bw(base_size=10)
interfactor correlation:
Less inter-correlated than the previous dataset
fa_pc_4$Phi %>%
as.tibble() %>%
dplyr::rename(Value = MR1, Clarity = MR2, External = MR3, Cansensus = MR4) %>%
round(.,2) %>%
remove_rownames() %>%
mutate(factor = colnames(.)) %>%
select(factor, everything()) %>%
kable(format = "html", escape = F, caption = "<center>Factor Correlation of mTurk Longitudinal</center>") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| factor | Value | External | Clarity | Cansensus |
|---|---|---|---|---|
| Value | 1.00 | 0.40 | 0.32 | 0.36 |
| External | 0.40 | 1.00 | -0.04 | 0.13 |
| Clarity | 0.32 | -0.04 | 1.00 | 0.05 |
| Cansensus | 0.36 | 0.13 | 0.05 | 1.00 |
It fits better than the one with pearson
data.frame(sample = "mTurk_l", factors = 4, items = 26, observation = 845, chi = fa_pc_4$chi, BIC = fa_pc_4$BIC, fit = fa_pc_4$fit, RMSEA = fa_pc_4$RMSEA[1], cumVar = max(fa_pc_4$Vaccounted[3,]), complexity = mean(fa_pc_4$complexity)) %>%
remove_rownames() %>%
kable(format = "html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
| sample | factors | items | observation | chi | BIC | fit | RMSEA | cumVar | complexity |
|---|---|---|---|---|---|---|---|---|---|
| mTurk_l | 4 | 26 | 845 | 1698.616 | 515.3963 | 0.8242109 | 0.0973533 | 0.4096362 | 1.563847 |
longitudinal_factorFit <- data.frame(sample = "mTurk", factors = 4, items = 26, observation = 845, chi = fa_pc_4$chi, BIC = fa_pc_4$BIC, fit = fa_pc_4$fit, RMSEA = fa_pc_4$RMSEA[1], cumVar = max(fa_pc_4$Vaccounted[3,]), complexity = mean(fa_pc_4$complexity))
#write.csv(sona_factorFit, "./outputs/mTurk2_factorFit.csv", row.names = F)
# extract 6 factors
fa_pc_6 <-fa(r=corrM_pc, nfactors=6,n.obs = 845, rotate="promax", SMC=FALSE, fm="minres")
fa.diagram(fa_pc_6)
# visualization
loadings <- fa.sort(fa_pc_6)$loadings
loadings <- as.data.frame(unclass(loadings))
colnames(loadings) <- c("Value","External", "Attainability", "Consensus", "Measurability", "Instrumentality")
loadings$Items <- rownames(loadings)
loadings.m <- loadings %>% gather(-Items, key = "Factor", value = "Loading")
colOrder <- c("Value","External", "Attainability", "Consensus", "Measurability", "Instrumentality")
rowOrder <- rev(rownames(loadings))
loadings.m<- arrange(mutate(loadings.m,Items=factor(Items,leve=rowOrder)),Items)
loadings.m<- arrange(mutate(loadings.m,Factor=factor(Factor,leve=colOrder)),Factor)
ggplot(loadings.m, aes(Items, abs(Loading), fill=Loading)) +
facet_wrap(~ Factor, nrow=1) + #place the factors in separate facets
geom_bar(stat="identity") + #make the bars
coord_flip() + #flip the axes so the test names can be horizontal
#define the fill color gradient: blue=positive, red=negative
scale_fill_gradient2(name = "Loading",
high = "orange", mid = "white", low = "midnightblue",
midpoint=0, guide="colourbar") +
ylab("Loading Strength") + #improve y-axis label +
ggtitle("Six Factor Solution from mTurk Longitudinal") +
geom_hline(yintercept = 0.3, color = "red", linetype="dotted") +
theme_bw(base_size=10)
interfactor correlation:
factor value and instrumentality has the highest correlation
fa_pc_6$Phi %>%
as.tibble() %>%
dplyr::rename(Value = MR1, External = MR2, Attainability = MR3, Consensus = MR4, Measurability = MR5,Instrumentality = MR6) %>%
round(.,2) %>%
remove_rownames() %>%
mutate(factor = colnames(.)) %>%
select(factor, everything()) %>%
kable(format = "html", escape = F, caption = "<center>Factor Correlation of mTurk Longitudinal</center>") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center",fixed_thead = T)
| factor | Value | External | Attainability | Consensus | Measurability | Instrumentality |
|---|---|---|---|---|---|---|
| Value | 1.00 | 0.27 | 0.39 | 0.43 | 0.16 | 0.51 |
| External | 0.27 | 1.00 | -0.09 | 0.16 | -0.18 | 0.04 |
| Attainability | 0.39 | -0.09 | 1.00 | 0.22 | 0.17 | 0.26 |
| Consensus | 0.43 | 0.16 | 0.22 | 1.00 | 0.40 | 0.36 |
| Measurability | 0.16 | -0.18 | 0.17 | 0.40 | 1.00 | 0.13 |
| Instrumentality | 0.51 | 0.04 | 0.26 | 0.36 | 0.13 | 1.00 |
it fits better than the one with Pearson
data.frame(sample = "mTurk_l", factors = 6, items = 26, observation = 845, chi = fa_pc_6$chi, BIC = fa_pc_6$BIC, fit = fa_pc_6$fit, RMSEA = fa_pc_6$RMSEA[1], cumVar = max(fa_pc_6$Vaccounted[3,]), complexity = mean(fa_pc_6$complexity)) %>%
remove_rownames() %>%
kable(format = "html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F,position = "center")
| sample | factors | items | observation | chi | BIC | fit | RMSEA | cumVar | complexity |
|---|---|---|---|---|---|---|---|---|---|
| mTurk_l | 6 | 26 | 845 | 625.2586 | -124.0245 | 0.8781573 | 0.0774146 | 0.4862908 | 1.788499 |
longitudinal_factorFit <- data.frame(sample = "mTurk_l", factors = 6, items = 26, observation = 845, chi = fa_pc_6$chi, BIC = fa_pc_6$BIC, fit = fa_pc_6$fit, RMSEA = fa_pc_6$RMSEA[1], cumVar = max(fa_pc_6$Vaccounted[3,]), complexity = mean(fa_pc_6$complexity))
#write.csv(sona_factorFit, "./outputs/mTurk2_factorFit.csv", row.names = F)
I double checked the labels but it’s still very weird
# create a hierarchical factor structure organization
ba <- bassAckward(corrM_pc, c(4,6), rotate = "promax", scores = "Bartlett", adjust = T, cut = 0.3, use = "pairwise", plot = F)
# re-label the factors with the factor names
ba$labels[[1]] <- c("Value", "Clarity", "External", "Consensus")
ba$labels[[2]] <- c("Value","External", "Attainability", "Consensus", "Measurability", "Instrumentality")
# create the figure
bassAckward.diagram(ba,digits=2,cut = .3,labels=NULL,marg=c(1,0.2,1.0,0.2),
main="BassAckward",items=TRUE,sort=TRUE,lr=F,curves=F,organize=T)
I will use the output from the polychoric correlation for the following analysis
Evaluate the indeterminacy of each factor based on the square of the multiple correlation between each factor and the original variable. (range from 0 to 1)
fa_pc_4$R2
## [1] 0.9124096 0.8373077 0.8067033 0.6875801
Evaluate the indeterminacy of each factor based on the minimum possible corre- lation between two sets of competing factor scores, 2p2 - 1 (range from -1 to 1)
2 * fa_pc_4$R2 -1
## [1] 0.8248192 0.6746154 0.6134067 0.3751601
# use the 4-factor model
factorScoreDf_4f <- goalDf_R_wide %>%
rowwise() %>%
mutate(Value = (attractiveness_achievement + attractiveness_progress + ideal_motivation + importance + construal_level + identified_motivation + meaningfulness +
instrumentality + difficulty + connectedness) / 10,
Clarity = (clarity + attainability + measurability + affordance + specificity - conflict + control) / 7,
External = (ought_motivation + external_motivation + external_importance + introjected_motivation + visibility) / 5,
Consensus = (-intrinsic_motivation + commonality + basic_needs + social_desirability) / 4) %>%
select(MTurkCode, goal, goalType, Value, Clarity, External, Consensus)
# use the 6-factor model
factorScoreDf_6f <- goalDf_R_wide %>%
rowwise() %>%
mutate(Value = (attractiveness_achievement + attractiveness_progress + importance + construal_level + identified_motivation + ideal_motivation + meaningfulness) / 7,
External = (ought_motivation + external_motivation + introjected_motivation + external_importance + conflict + visibility) / 6,
Attainability = (attainability - difficulty + clarity + affordance) /4,
Consensus = (commonality + basic_needs + social_desirability - intrinsic_motivation) / 4,
Measurability = (specificity + measurability) / 2,
Instrumentality = (connectedness + instrumentality + control) / 3) %>%
select(MTurkCode, goal, goalType, Value, External, Attainability, Consensus, Measurability, Instrumentality)
# write the csv factor score datasets
write.csv(factorScoreDf_4f, here("Baseline", "Outputs", "factorScoreDf_4f.csv"), row.names = F)
write.csv(factorScoreDf_6f, here("Baseline", "Outputs", "factorScoreDf_6f.csv"), row.names = F)
# transform to z-score
goalDf_EFA_z <- goalDf_EFA %>%
mutate(across(where(is.numeric), ~ scale(.x, center = TRUE, scale = TRUE)))